suppressWarnings(library(MendelianRandomization))
suppressWarnings(library(digest))
suppressWarnings(library(tidyr))
suppressWarnings(library(MRPRESSO))
suppressWarnings(library(rowr))
suppressWarnings(library(ggplot2))
suppressWarnings(library(dplyr))
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:rowr':
##
## coalesce, count
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
setwd('C:/Users/marta/Desktop/IC/Thesis/Genetic Variants')
diabetes_b<-readRDS('C:/Users/marta/Desktop/IC/Thesis/Genetic Variants/Diabetes/diabetes_b.rds')
diabetes_p<-readRDS('C:/Users/marta/Desktop/IC/Thesis/Genetic Variants/Diabetes/diabetes_p.rds')
breast<-readRDS('C:/Users/marta/Desktop/IC/Thesis/Genetic Variants/Breast Cancer/breast.rds')
prostate<-readRDS('C:/Users/marta/Desktop/IC/Thesis/Genetic Variants/Prostate Cancer/prostate.rds')
breast= separate(data=breast, col=phase3_1kg_id, into=c('SNP','rest'), sep= "\\:")
## Warning: Expected 2 pieces. Additional pieces discarded in 111 rows [1, 2,
## 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
groups<-read.csv('C:/Users/marta/Desktop/IC/Thesis/Genetic Variants/groups.csv', stringsAsFactors = TRUE, header=TRUE)
rs_b<-breast$SNP
index<-match(rs_b,groups$RS_ID)
groups_b<-groups[index,]
rs_p<-prostate$SNP
index<-match(rs_p,groups$RS_ID)
groups_p<-groups[index,]
MRInputObject_breast <- mr_input(bx = diabetes_b$Effect,
bxse = diabetes_b$StdErr,
by = breast$bcac_onco_icogs_gwas_beta,
byse = breast$bcac_onco_icogs_gwas_se, outcome = 'breast cancer', exposure='diabetes',snps=rs_b)
MRAllObject_all_breast <- mr_allmethods(MRInputObject_breast, method = "all")
MRAllObject_all_breast
## Method Estimate Std Error 95% CI P-value
## Simple median -0.004 0.019 -0.041 0.033 0.837
## Weighted median 0.011 0.018 -0.024 0.045 0.538
## Penalized weighted median -0.004 0.018 -0.039 0.031 0.825
##
## IVW 0.000 0.017 -0.033 0.033 0.998
## Penalized IVW -0.007 0.013 -0.033 0.019 0.581
## Robust IVW 0.005 0.023 -0.040 0.050 0.819
## Penalized robust IVW -0.008 0.016 -0.040 0.024 0.612
##
## MR-Egger 0.029 0.035 -0.040 0.098 0.407
## (intercept) -0.003 0.003 -0.009 0.003 0.344
## Penalized MR-Egger 0.003 0.031 -0.058 0.064 0.926
## (intercept) -0.001 0.002 -0.006 0.004 0.743
## Robust MR-Egger 0.046 0.066 -0.083 0.176 0.484
## (intercept) -0.004 0.005 -0.013 0.006 0.447
## Penalized robust MR-Egger -0.005 0.041 -0.085 0.076 0.908
## (intercept) 0.000 0.003 -0.006 0.006 0.939
mr_plot(MRInputObject_breast,
error = TRUE, orientate = FALSE, line = "ivw")
mr_plot(MRAllObject_all_breast)
#MR-PRESSO breast
input = data.frame(betaY=breast$bcac_onco_icogs_gwas_beta, seY=breast$bcac_onco_icogs_gwas_se,beta_diabetes=diabetes_b$Effect, se_diabetes=diabetes_b$StdErr,row.names= NULL)
mrpresso_out = mr_presso(BetaOutcome = "betaY", BetaExposure = c("beta_diabetes"), SdOutcome = "seY", SdExposure="se_diabetes", data=input, OUTLIERtest = TRUE, DISTORTIONtest = TRUE, NbDistribution = 10000)
mrpresso_out$`Main MR results`
## Exposure MR Analysis Causal Estimate Sd T-stat
## 1 beta_diabetes Raw 4.022679e-05 0.01704120 0.002360562
## 2 beta_diabetes Outlier-corrected -7.200820e-03 0.01400442 -0.514181786
## P-value
## 1 0.9981208
## 2 0.6082263
mrpresso_out$`MR-PRESSO results`$`Distortion Test`$`Distortion Coefficient`
## beta_diabetes
## 100.5586
mrpresso_out$`MR-PRESSO results`$`Distortion Test`$Pvalue
## [1] 0.222
out_indices = mrpresso_out$`MR-PRESSO results`$`Distortion Test`$`Outliers Indices`
outliers = rs_b[out_indices]
data_b<-cbind(diabetes_b, breast)
data_b <- data_b[, !duplicated(colnames(data_b))]
# creating a scatterplot
ggplot(data = data_b, aes(x = diabetes_b$Effect, y = breast$bcac_onco_icogs_gwas_beta, color =ifelse(data_b$SNP%in%outliers,yes='red',no='blue'))) +
geom_point(size=2) +
labs(title = "MR analysis\n", x = "T2D", y = "Breast cancer", color = "Genetic variants\n") +
scale_color_manual(labels = c("Rest of SNPs", "outliers"), values = c("blue", "red")) +
theme_bw() +
theme(axis.text.x = element_text(size = 14), axis.title.x = element_text(size = 16),
axis.text.y = element_text(size = 14), axis.title.y = element_text(size = 16),
plot.title = element_text(size = 20, face = "bold", color = "darkgreen"))
# creating a scatterplot
ggplot(data = data_b, aes(x = diabetes_b$Effect, y = breast$bcac_onco_icogs_gwas_beta)) +
geom_point(aes(color=factor(groups_b$TRAIT)),size=2) +
labs(title = "MR analysis\n", x = "T2D", y = "Breast cancer", color = "Genetic variants\n")
# Stratified MR analyses
data_b<-cbind(data_b, groups_b)
data_b_insulin<-data_b[data_b$TRAIT=='INSULIN RESISTANCE',]
MRInputObject_breast_insulin <- mr_input(bx = data_b_insulin$Effect,
bxse = data_b_insulin$StdErr,
by = data_b_insulin$bcac_onco_icogs_gwas_beta,
byse = data_b_insulin$bcac_onco_icogs_gwas_se, outcome = 'breast cancer', exposure='diabetes',snps=data_b_insulin$SNP)
MRAllObject_all_breast_insulin <- mr_allmethods(MRInputObject_breast_insulin, method = "all")
MRAllObject_all_breast_insulin
## Method Estimate Std Error 95% CI P-value
## Simple median -0.051 0.030 -0.110 0.008 0.091
## Weighted median 0.075 0.027 0.021 0.128 0.006
## Penalized weighted median 0.104 0.022 0.061 0.147 0.000
##
## IVW 0.013 0.026 -0.038 0.065 0.609
## Penalized IVW -0.065 0.024 -0.112 -0.018 0.007
## Robust IVW -0.013 0.084 -0.178 0.151 0.874
## Penalized robust IVW -0.069 0.024 -0.117 -0.021 0.005
##
## MR-Egger 0.129 0.050 0.031 0.227 0.010
## (intercept) -0.015 0.006 -0.027 -0.004 0.009
## Penalized MR-Egger 0.142 0.048 0.049 0.236 0.003
## (intercept) -0.016 0.005 -0.027 -0.005 0.004
## Robust MR-Egger 0.133 0.057 0.021 0.245 0.020
## (intercept) -0.016 0.006 -0.027 -0.004 0.006
## Penalized robust MR-Egger 0.145 0.045 0.057 0.233 0.001
## (intercept) -0.016 0.005 -0.026 -0.007 0.001
mr_plot(MRInputObject_breast_insulin,
error = TRUE, orientate = FALSE, line = "ivw")
mr_plot(MRAllObject_all_breast_insulin)
data_b<-cbind(data_b, groups_b)
data_b_glucose<-data_b[data_b$TRAIT=='GLUCOSE',]
MRInputObject_breast_glucose <- mr_input(bx = data_b_glucose$Effect,
bxse = data_b_glucose$StdErr,
by = data_b_glucose$bcac_onco_icogs_gwas_beta,
byse = data_b_glucose$bcac_onco_icogs_gwas_se, outcome = 'breast cancer', exposure='diabetes',snps=data_b_glucose$SNP)
MRAllObject_all_breast_glucose <- mr_allmethods(MRInputObject_breast_glucose, method = "all")
MRAllObject_all_breast_glucose
## Method Estimate Std Error 95% CI P-value
## Simple median 0.014 0.039 -0.063 0.090 0.726
## Weighted median -0.031 0.038 -0.106 0.043 0.411
## Penalized weighted median -0.024 0.038 -0.099 0.051 0.529
##
## IVW -0.021 0.042 -0.102 0.061 0.619
## Penalized IVW 0.017 0.034 -0.049 0.083 0.618
## Robust IVW -0.013 0.039 -0.089 0.064 0.748
## Penalized robust IVW 0.011 0.031 -0.050 0.072 0.724
##
## MR-Egger -0.083 0.133 -0.344 0.177 0.530
## (intercept) 0.006 0.011 -0.016 0.028 0.618
## Penalized MR-Egger -0.090 0.090 -0.266 0.086 0.318
## (intercept) 0.008 0.007 -0.007 0.022 0.283
## Robust MR-Egger -0.084 0.112 -0.303 0.135 0.450
## (intercept) 0.006 0.009 -0.011 0.024 0.483
## Penalized robust MR-Egger -0.091 0.079 -0.246 0.065 0.253
## (intercept) 0.008 0.007 -0.006 0.022 0.256
mr_plot(MRInputObject_breast_glucose,
error = TRUE, orientate = FALSE, line = "ivw")
mr_plot(MRAllObject_all_breast_glucose)
data_b<-cbind(data_b, groups_b)
data_b_lipid<-data_b[data_b$TRAIT=='LIPID',]
MRInputObject_breast_lipid <- mr_input(bx = data_b_lipid$Effect,
bxse = data_b_lipid$StdErr,
by = data_b_lipid$bcac_onco_icogs_gwas_beta,
byse = data_b_lipid$bcac_onco_icogs_gwas_se, outcome = 'breast cancer', exposure='diabetes',snps=data_b_lipid$SNP)
MRAllObject_all_breast_lipid <- mr_allmethods(MRInputObject_breast_lipid, method = "all")
MRAllObject_all_breast_lipid
## Method Estimate Std Error 95% CI P-value
## Simple median -0.052 0.092 -0.232 0.128 0.574
## Weighted median -0.022 0.063 -0.145 0.101 0.720
## Penalized weighted median -0.021 0.063 -0.145 0.102 0.736
##
## IVW -0.042 0.097 -0.233 0.148 0.664
## Penalized IVW -0.012 0.052 -0.114 0.091 0.825
## Robust IVW -0.026 0.036 -0.097 0.045 0.476
## Penalized robust IVW -0.015 0.039 -0.091 0.061 0.693
##
## MR-Egger 0.192 0.203 -0.205 0.590 0.343
## (intercept) -0.014 0.011 -0.035 0.007 0.195
## Penalized MR-Egger 0.192 0.203 -0.205 0.590 0.343
## (intercept) -0.014 0.011 -0.035 0.007 0.195
## Robust MR-Egger 0.173 0.191 -0.201 0.547 0.365
## (intercept) -0.013 0.013 -0.038 0.013 0.321
## Penalized robust MR-Egger 0.173 0.191 -0.201 0.547 0.365
## (intercept) -0.013 0.013 -0.038 0.013 0.321
mr_plot(MRInputObject_breast_lipid,
error = TRUE, orientate = FALSE, line = "ivw")
mr_plot(MRAllObject_all_breast_lipid)
MRInputObject_prostate <- mr_input(bx = diabetes_p$Effect,
bxse = diabetes_p$StdErr,
by = prostate$Effect,
byse = prostate$StdErr, outcome = 'prostate cancer', exposure='diabetes',snps=rs_p)
MRAllObject_all_prostate <- mr_allmethods(MRInputObject_prostate, method = "all")
MRAllObject_all_prostate
## Method Estimate Std Error 95% CI P-value
## Simple median 0.011 0.024 -0.035 0.058 0.632
## Weighted median 0.001 0.022 -0.042 0.045 0.947
## Penalized weighted median -0.005 0.022 -0.049 0.039 0.819
##
## IVW -0.041 0.042 -0.124 0.042 0.333
## Penalized IVW -0.010 0.016 -0.042 0.022 0.543
## Robust IVW -0.007 0.018 -0.042 0.028 0.690
## Penalized robust IVW -0.012 0.016 -0.043 0.020 0.469
##
## MR-Egger -0.113 0.100 -0.309 0.084 0.260
## (intercept) 0.006 0.007 -0.009 0.021 0.428
## Penalized MR-Egger -0.048 0.038 -0.122 0.027 0.213
## (intercept) 0.003 0.003 -0.003 0.009 0.297
## Robust MR-Egger -0.020 0.039 -0.097 0.056 0.603
## (intercept) 0.001 0.003 -0.004 0.006 0.690
## Penalized robust MR-Egger -0.045 0.032 -0.108 0.017 0.155
## (intercept) 0.003 0.002 -0.002 0.007 0.274
mr_plot(MRInputObject_prostate,
error = TRUE, orientate = FALSE, line = "ivw")
mr_plot(MRAllObject_all_prostate)
#MR-PRESSO prostate
input = data.frame(betaY=prostate$Effect,seY=prostate$StdErr,beta_diabetes=diabetes_p$Effect,se_diabetes=diabetes_p$StdErr, row.names= NULL)
mrpresso_out = mr_presso(BetaOutcome = "betaY", BetaExposure = c("beta_diabetes"), SdOutcome = "seY", SdExposure="se_diabetes", data=input, OUTLIERtest = TRUE, DISTORTIONtest = TRUE, NbDistribution = 10000)
mrpresso_out$`Main MR results`
## Exposure MR Analysis Causal Estimate Sd T-stat
## 1 beta_diabetes Raw -0.04085910 0.04223478 -0.9674276
## 2 beta_diabetes Outlier-corrected -0.01429709 0.01722820 -0.8298654
## P-value
## 1 0.3360120
## 2 0.4089954
mrpresso_out$`MR-PRESSO results`$`Distortion Test`$`Distortion Coefficient`
## beta_diabetes
## -185.7861
mrpresso_out$`MR-PRESSO results`$`Distortion Test`$Pvalue
## [1] 0.034
out_indices = mrpresso_out$`MR-PRESSO results`$`Distortion Test`$`Outliers Indices`
outliers=rs_p[out_indices]
colnames(prostate)[13]<-'Effect_p'
colnames(prostate)[14]<-'StdErr_p'
data_p<-cbind(diabetes_p, prostate)
data_p <- data_p[, !duplicated(colnames(data_p))]
ggplot(data = data_p, aes(x = diabetes_p$Effect, y = prostate$Effect, color =ifelse(data_p$SNP%in%outliers,yes='red',no='blue'))) +
geom_point(size=2) +
labs(title = "MR analysis\n", x = "T2D", y = "Prostate cancer", color = "Genetic variants\n") +
scale_color_manual(labels = c("Rest of SNPs", "outliers"), values = c("blue", "red")) +
theme_bw() +
theme(axis.text.x = element_text(size = 14), axis.title.x = element_text(size = 16),
axis.text.y = element_text(size = 14), axis.title.y = element_text(size = 16),
plot.title = element_text(size = 20, face = "bold", color = "darkgreen"))
# #extract rs numbers below 5x10^-8
#
# setwd('C:/Users/marta/Desktop/IC/Thesis/Genetic variants/Pathways')
#
# # where is the summary data stored?
# name_in = "/Users/marta/Desktop/IC/Thesis/Genetic variants/Pathways/"
# # what are the files in the folder?
# os_list_dir=list.files(name_in)
# os_list_dir=sort(os_list_dir)
#
# os_list_dir
#
#
# # where to save the summary data for the instruments?
# name_out = "/Users/marta/Desktop/IC/Thesis/New"
#
# if(!file.exists(name_out)){dir.create(file.path(name_out))}
#
#
# for (i in 1:length(os_list_dir)){
# print(paste("reading: ", os_list_dir[i]))
# data<-read.csv(os_list_dir[i], stringsAsFactors = FALSE, header = TRUE)
# output1 = cbind.fill(output1,data[,10], fill=NA)
# }
# col
# #colnames(output1)<-sub(".csv","",col)
#
# filename = sprintf("pathways.csv", name_out, rf)
#
# write.csv(file=filename, output)
ggplot(data = data_p, aes(x = diabetes_p$Effect, y = prostate$Effect)) +
geom_point(aes(color=factor(groups_p$TRAIT)),size=2) +
labs(title = "MR analysis\n", x = "T2D", y = "Prostate cancer", color = "Genetic variants\n")
data_p<-cbind(data_p, groups_p)
data_p_insulin<-data_p[data_p$TRAIT=='INSULIN RESISTANCE',]
MRInputObject_prostate_insulin <- mr_input(bx = data_p_insulin$Effect,
bxse = data_p_insulin$StdErr,
by = data_p_insulin$Effect_p,
byse = data_p_insulin$StdErr_p, outcome = 'prostate cancer', exposure='diabetes',snps=data_p_insulin$SNP)
MRAllObject_all_prostate <- mr_allmethods(MRInputObject_prostate_insulin, method = "all")
MRAllObject_all_prostate
## Method Estimate Std Error 95% CI P-value
## Simple median 0.036 0.039 -0.039 0.112 0.346
## Weighted median -0.004 0.036 -0.074 0.066 0.918
## Penalized weighted median -0.018 0.036 -0.088 0.052 0.615
##
## IVW 0.024 0.038 -0.049 0.098 0.517
## Penalized IVW 0.005 0.033 -0.059 0.070 0.868
## Robust IVW 0.017 0.036 -0.054 0.087 0.645
## Penalized robust IVW 0.003 0.028 -0.053 0.059 0.915
##
## MR-Egger -0.093 0.095 -0.280 0.094 0.331
## (intercept) 0.012 0.009 -0.006 0.030 0.183
## Penalized MR-Egger -0.139 0.077 -0.290 0.012 0.071
## (intercept) 0.015 0.007 0.000 0.029 0.043
## Robust MR-Egger -0.123 0.075 -0.270 0.023 0.099
## (intercept) 0.014 0.008 -0.002 0.031 0.083
## Penalized robust MR-Egger -0.146 0.065 -0.274 -0.019 0.025
## (intercept) 0.016 0.008 0.000 0.032 0.050
mr_plot(MRInputObject_prostate_insulin,
error = TRUE, orientate = FALSE, line = "ivw")
mr_plot(MRAllObject_all_prostate)
data_p<-cbind(data_p, groups_p)
data_p_glucose<-data_p[data_p$TRAIT=='GLUCOSE',]
MRInputObject_prostate_glucose <- mr_input(bx = data_p_glucose$Effect,
bxse = data_p_glucose$StdErr,
by = data_p_glucose$Effect_p,
byse = data_p_glucose$StdErr_p, outcome = 'prostate cancer', exposure='diabetes',snps=data_p_glucose$SNP)
MRAllObject_all_prostate <- mr_allmethods(MRInputObject_prostate_glucose, method = "all")
MRAllObject_all_prostate
## Method Estimate Std Error 95% CI P-value
## Simple median 0.030 0.061 -0.089 0.149 0.622
## Weighted median -0.057 0.052 -0.159 0.045 0.274
## Penalized weighted median -0.057 0.052 -0.159 0.045 0.274
##
## IVW -0.017 0.038 -0.092 0.059 0.662
## Penalized IVW -0.017 0.038 -0.092 0.059 0.662
## Robust IVW -0.018 0.040 -0.097 0.061 0.662
## Penalized robust IVW -0.018 0.040 -0.097 0.061 0.662
##
## MR-Egger -0.137 0.125 -0.383 0.108 0.273
## (intercept) 0.010 0.010 -0.009 0.030 0.312
## Penalized MR-Egger -0.137 0.125 -0.383 0.108 0.273
## (intercept) 0.010 0.010 -0.009 0.030 0.312
## Robust MR-Egger -0.165 0.723 -1.583 1.252 0.819
## (intercept) 0.010 0.022 -0.033 0.054 0.636
## Penalized robust MR-Egger -0.165 0.723 -1.583 1.252 0.819
## (intercept) 0.010 0.022 -0.033 0.054 0.636
mr_plot(MRInputObject_prostate_glucose,
error = TRUE, orientate = FALSE, line = "ivw")
mr_plot(MRAllObject_all_prostate)
data_p<-cbind(data_p, groups_p)
data_p_lipid<-data_p[data_p$TRAIT=='LIPID',]
MRInputObject_prostate_lipid <- mr_input(bx = data_p_lipid$Effect,
bxse = data_p_lipid$StdErr,
by = data_p_lipid$Effect_p,
byse = data_p_lipid$StdErr_p, outcome = 'prostate cancer', exposure='diabetes',snps=data_p_lipid$SNP)
MRAllObject_all_prostate <- mr_allmethods(MRInputObject_prostate_lipid, method = "all")
MRAllObject_all_prostate
## Method Estimate Std Error 95% CI P-value
## Simple median -0.168 0.209 -0.576 0.241 0.421
## Weighted median -0.227 0.135 -0.491 0.037 0.092
## Penalized weighted median -0.227 0.135 -0.491 0.037 0.092
##
## IVW -0.207 0.105 -0.413 -0.001 0.049
## Penalized IVW -0.207 0.105 -0.413 -0.001 0.049
## Robust IVW -0.207 0.072 -0.349 -0.066 0.004
## Penalized robust IVW -0.207 0.072 -0.349 -0.066 0.004
##
## MR-Egger -0.287 0.221 -0.720 0.146 0.194
## (intercept) 0.004 0.009 -0.014 0.022 0.680
## Penalized MR-Egger -0.287 0.221 -0.720 0.146 0.194
## (intercept) 0.004 0.009 -0.014 0.022 0.680
## Robust MR-Egger -0.288 0.067 -0.418 -0.157 0.000
## (intercept) 0.004 0.004 -0.005 0.012 0.375
## Penalized robust MR-Egger -0.288 0.067 -0.418 -0.157 0.000
## (intercept) 0.004 0.004 -0.005 0.012 0.375
mr_plot(MRInputObject_prostate_lipid,
error = TRUE, orientate = FALSE, line = "ivw")
mr_plot(MRAllObject_all_prostate)